package wikiextract.nlp.trainingset.x;

import java.io.IOException;

import wikiextract.util.io.DelimitedReader;
import wikiextract.util.io.DelimitedWriter;

public class CreateChunkFiles {

	static String tmpDir = "/projects/pardosa/s2/raphaelh/tmp";
	
	static int CHUNKS = 32; //16
	
	public static void main(String[] args) throws IOException {
		//split(tmpDir + "/wp/atts.int.id-good");
		//split(tmpDir + "/wp/atts.text.id-good");
		split(tmpDir + "/nfb/atts.int.id-good");
		split(tmpDir + "/nfb/atts.text.id-good");
	}
	
	private static void split(String file) throws IOException {
		DelimitedReader r = new DelimitedReader(file);
		DelimitedWriter[] w = new DelimitedWriter[CHUNKS];
		for (int i=0; i < w.length; i++) 
			w[i] = new DelimitedWriter(file + "-" + i);
		String[] t = null;
		int j=0;
		while ((t = r.read()) != null) {
			w[j++ % CHUNKS].write(t);
		}		
		for (int i=0; i < w.length; i++) 
			w[i].close();
		r.close();
	}
}
